{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 4 Pandas的索引操作"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.147270Z",
     "start_time": "2025-03-03T06:18:26.515975Z"
    }
   },
   "outputs": [],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "source": [
    "\n",
    "dict_data = {'A': 1,\n",
    "             'B': pd.Timestamp('20190926'),\n",
    "             'C': pd.Series(1, index=list(range(4)),dtype='float32'),\n",
    "             'D': np.array([1,2,3,4],dtype='int32'),\n",
    "             'E': [\"Python\",\"Java\",\"C++\",\"C\"],\n",
    "             'F': 'wangdao' }\n",
    "df_obj2 = pd.DataFrame(dict_data)\n",
    "print(df_obj2.index)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.156638Z",
     "start_time": "2025-03-03T06:18:27.149290Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index([0, 1, 2, 3], dtype='int64')\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "source": [
    "# 索引对象的值不可变（上面代码增加）\n",
    "# df_obj2.index[0] = 2"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.163148Z",
     "start_time": "2025-03-03T06:18:27.157641Z"
    }
   },
   "outputs": [],
   "execution_count": 3
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 3 常见的Index种类\n",
    "•Index，索引  可以是各种类型\n",
    "•Int64Index，整数索引\n",
    "•MultiIndex，层级索引，难度较大\n",
    "•DatetimeIndex，时间戳类型"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "ser_obj = pd.Series(range(5), index = list(\"abcde\"))\n",
    "print(ser_obj)\n",
    "ser_obj.index"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.174738Z",
     "start_time": "2025-03-03T06:18:27.164157Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['a', 'b', 'c', 'd', 'e'], dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "source": [
    "# 行索引，不仅可以用索引名，可以用索引位置或来取\n",
    "print(ser_obj['b']) #索引名\n",
    "print(ser_obj[2]) #位置索引 不规范写法  正确是.iloc[2]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.182086Z",
     "start_time": "2025-03-03T06:18:27.176741Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Lenovo\\AppData\\Local\\Temp\\ipykernel_21816\\2834633063.py:3: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(ser_obj[2]) #位置索引\n"
     ]
    }
   ],
   "execution_count": 5
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.189330Z",
     "start_time": "2025-03-03T06:18:27.184089Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print(ser_obj.loc['b']) #索引名\n",
    "print(ser_obj.iloc[2]) #位置索引"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n"
     ]
    }
   ],
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "source": [
    "# 切片索引\n",
    "print(ser_obj.iloc[1:3])  #索引位置取数据，左闭右开\n",
    "print(ser_obj.loc['b':'d'])  #记住索引名  左闭右闭"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.199682Z",
     "start_time": "2025-03-03T06:18:27.190333Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "code",
   "source": [
    "# 不连续索引\n",
    "print(ser_obj.iloc[[0, 2, 4]])\n",
    "print(ser_obj.loc[['a', 'e']])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.208740Z",
     "start_time": "2025-03-03T06:18:27.200746Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "c    2\n",
      "e    4\n",
      "dtype: int64\n",
      "a    0\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "source": [
    "# 布尔索引\n",
    "ser_bool = ser_obj > 2\n",
    "print(ser_obj)\n",
    "print(ser_bool)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.215591Z",
     "start_time": "2025-03-03T06:18:27.209748Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "a    False\n",
      "b    False\n",
      "c    False\n",
      "d     True\n",
      "e     True\n",
      "dtype: bool\n"
     ]
    }
   ],
   "execution_count": 9
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.222356Z",
     "start_time": "2025-03-03T06:18:27.216587Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print('-'*50)\n",
    "print(ser_obj[ser_bool])\n",
    "\n",
    "print(ser_obj[ser_obj > 2]) #取出大于2的元素"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 10
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 4.4 DataFrame索引"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import numpy as np\n",
    "df_obj = pd.DataFrame(np.random.randn(5,4),\n",
    "                      columns = ['a', 'b', 'c', 'd'])\n",
    "print(df_obj.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.233545Z",
     "start_time": "2025-03-03T06:18:27.224359Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "0 -0.103309  1.700561 -0.677380  0.145718\n",
      "1 -0.131567  1.618122  0.163675 -1.043510\n",
      "2 -1.020225 -1.114694 -0.299003 -1.334290\n",
      "3 -0.034927  0.143654  0.374958  2.697998\n",
      "4  1.122483  0.792727  1.194391 -0.860591\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "cell_type": "code",
   "source": [
    "# 列索引\n",
    "print(df_obj['a']) # 返回Series类型\n",
    "print('-'*50)\n",
    "print(df_obj[['a']]) # 返回DataFrame类型\n",
    "print('-'*50)\n",
    "print(type(df_obj[['a']])) # 返回DataFrame类型"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.243599Z",
     "start_time": "2025-03-03T06:18:27.234551Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -0.103309\n",
      "1   -0.131567\n",
      "2   -1.020225\n",
      "3   -0.034927\n",
      "4    1.122483\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n",
      "          a\n",
      "0 -0.103309\n",
      "1 -0.131567\n",
      "2 -1.020225\n",
      "3 -0.034927\n",
      "4  1.122483\n",
      "--------------------------------------------------\n",
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "markdown",
   "source": [
    "1. loc 标签索引(通过索引标签值获取数据)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 标签索引 loc，建议使用loc，效率更高\n",
    "# Series\n",
    "print(ser_obj)\n",
    "print(ser_obj['b':'d'])\n",
    "print(ser_obj.loc['b':'d']) #前闭后闭\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.253424Z",
     "start_time": "2025-03-03T06:18:27.245547Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "source": [
    "# DataFrame\n",
    "df_obj = pd.DataFrame(np.random.randn(5,4),\n",
    "                      columns = list('abcd'),\n",
    "                      index=list('abcde'))\n",
    "print(df_obj)\n",
    "print('-'*50)\n",
    "print(df_obj['a'])  #建议不用,拿的是列\n",
    "print('-'*50)\n",
    "print(df_obj.loc['a'])  #拿的是行\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.263949Z",
     "start_time": "2025-03-03T06:18:27.255949Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "a  0.445561  1.760271  0.016399  1.055019\n",
      "b  1.745069 -0.186692 -2.860581  2.562556\n",
      "c -1.485804  0.815871  0.839335 -0.972042\n",
      "d -0.993903  0.393568 -0.315029  0.990964\n",
      "e -0.207641  0.780447  0.198634 -0.226742\n",
      "--------------------------------------------------\n",
      "a    0.445561\n",
      "b    1.745069\n",
      "c   -1.485804\n",
      "d   -0.993903\n",
      "e   -0.207641\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n",
      "a    0.445561\n",
      "b    1.760271\n",
      "c    0.016399\n",
      "d    1.055019\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 14
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.277205Z",
     "start_time": "2025-03-03T06:18:27.264949Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 第一个参数索引行，第二个参数是列,loc或者iloc效率高于直接用取下标的方式，前闭后闭\n",
    "print(df_obj.loc['a':'c', 'b':'d']) #连续索引\n",
    "print(df_obj.loc[['a','c'], ['b','d']]) #不连续索引\n",
    "print(df_obj.loc[['c'],['b']]) #取一个值,返回的是DataFrame类型\n",
    "print(df_obj.loc['c','b'])  #取一个值"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          b         c         d\n",
      "a  1.760271  0.016399  1.055019\n",
      "b -0.186692 -2.860581  2.562556\n",
      "c  0.815871  0.839335 -0.972042\n",
      "          b         d\n",
      "a  1.760271  1.055019\n",
      "c  0.815871 -0.972042\n",
      "          b\n",
      "c  0.815871\n",
      "0.8158714006796811\n"
     ]
    }
   ],
   "execution_count": 15
  },
  {
   "cell_type": "markdown",
   "source": [
    "## iloc 位置索引(推荐使用)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "ser_obj\n",
    "print('-'*50)\n",
    "# Series\n",
    "print(ser_obj[1:3])\n",
    "print('-'*50)\n",
    "print(ser_obj.iloc[1:3]) # 前闭后开[)，效率高\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.285311Z",
     "start_time": "2025-03-03T06:18:27.279208Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "b    1\n",
      "c    2\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 16
  },
  {
   "cell_type": "code",
   "source": [
    "df_obj"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.297376Z",
     "start_time": "2025-03-03T06:18:27.287314Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          a         b         c         d\n",
       "a  0.445561  1.760271  0.016399  1.055019\n",
       "b  1.745069 -0.186692 -2.860581  2.562556\n",
       "c -1.485804  0.815871  0.839335 -0.972042\n",
       "d -0.993903  0.393568 -0.315029  0.990964\n",
       "e -0.207641  0.780447  0.198634 -0.226742"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.445561</td>\n",
       "      <td>1.760271</td>\n",
       "      <td>0.016399</td>\n",
       "      <td>1.055019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1.745069</td>\n",
       "      <td>-0.186692</td>\n",
       "      <td>-2.860581</td>\n",
       "      <td>2.562556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>-1.485804</td>\n",
       "      <td>0.815871</td>\n",
       "      <td>0.839335</td>\n",
       "      <td>-0.972042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>-0.993903</td>\n",
       "      <td>0.393568</td>\n",
       "      <td>-0.315029</td>\n",
       "      <td>0.990964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>-0.207641</td>\n",
       "      <td>0.780447</td>\n",
       "      <td>0.198634</td>\n",
       "      <td>-0.226742</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 17
  },
  {
   "cell_type": "code",
   "source": [
    "\n",
    "# DataFrame，iloc是前闭后开[)\n",
    "print(df_obj)\n",
    "print('-'*50)\n",
    "print(df_obj.iloc[0:2, 0:2]) \n",
    "print('-'*50)\n",
    "print(df_obj.iloc[[0,2], [0,2]]) # 不连续索引\n",
    "print('-'*50)\n",
    "print(df_obj.iloc[0,0]) # 取一个值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.308183Z",
     "start_time": "2025-03-03T06:18:27.299379Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "a  0.445561  1.760271  0.016399  1.055019\n",
      "b  1.745069 -0.186692 -2.860581  2.562556\n",
      "c -1.485804  0.815871  0.839335 -0.972042\n",
      "d -0.993903  0.393568 -0.315029  0.990964\n",
      "e -0.207641  0.780447  0.198634 -0.226742\n",
      "--------------------------------------------------\n",
      "          a         b\n",
      "a  0.445561  1.760271\n",
      "b  1.745069 -0.186692\n",
      "--------------------------------------------------\n",
      "          a         c\n",
      "a  0.445561  0.016399\n",
      "c -1.485804  0.839335\n",
      "--------------------------------------------------\n",
      "0.44556079538934157\n"
     ]
    }
   ],
   "execution_count": 18
  },
  {
   "cell_type": "code",
   "source": [
    "#没有设置行和列索引的DataFrame，iloc和loc的区别\n",
    "df_obj2 = pd.DataFrame(np.random.randn(5,4))\n",
    "print(df_obj2)\n",
    "print('-'*50)\n",
    "print(df_obj2.iloc[0:2]) #左闭右开 2行\n",
    "print('-'*50)\n",
    "print(df_obj2.loc[0:2]) #左闭右闭 3行"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.320546Z",
     "start_time": "2025-03-03T06:18:27.309171Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0  0.040860  0.762460  1.559588  0.263520\n",
      "1  0.847435 -0.000017  1.079594 -0.031536\n",
      "2 -1.401613 -0.228747  0.267186  0.416540\n",
      "3  0.955172  1.034893 -1.703425 -0.487765\n",
      "4 -0.947702 -0.246026  0.241877 -0.116206\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0  0.040860  0.762460  1.559588  0.263520\n",
      "1  0.847435 -0.000017  1.079594 -0.031536\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0  0.040860  0.762460  1.559588  0.263520\n",
      "1  0.847435 -0.000017  1.079594 -0.031536\n",
      "2 -1.401613 -0.228747  0.267186  0.416540\n"
     ]
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 5.对齐运算"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "s1 = pd.Series(range(10, 20), index = range(10))\n",
    "s2 = pd.Series(range(20, 25), index = range(5))\n",
    "# Series 对齐运算\n",
    "print('s1+s2: ')\n",
    "s3=s1+s2\n",
    "print(s3)  #缺失数据默认是NaN  np.nan"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.330911Z",
     "start_time": "2025-03-03T06:18:27.322548Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s1+s2: \n",
      "0    30.0\n",
      "1    32.0\n",
      "2    34.0\n",
      "3    36.0\n",
      "4    38.0\n",
      "5     NaN\n",
      "6     NaN\n",
      "7     NaN\n",
      "8     NaN\n",
      "9     NaN\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "source": [
    "#两个长度不同的一维ndarray相加\n",
    "a1 = np.array([1,2,3,4,5])\n",
    "a2 = np.array([1]) # 长度为1\n",
    "print(a2.shape)\n",
    "print(a1+a2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.338907Z",
     "start_time": "2025-03-03T06:18:27.332917Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1,)\n",
      "[2 3 4 5 6]\n"
     ]
    }
   ],
   "execution_count": 21
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.348164Z",
     "start_time": "2025-03-03T06:18:27.340913Z"
    }
   },
   "cell_type": "code",
   "source": [
    "print(s2)\n",
    "s1"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    20\n",
      "1    21\n",
      "2    22\n",
      "3    23\n",
      "4    24\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    10\n",
       "1    11\n",
       "2    12\n",
       "3    13\n",
       "4    14\n",
       "5    15\n",
       "6    16\n",
       "7    17\n",
       "8    18\n",
       "9    19\n",
       "dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 22
  },
  {
   "cell_type": "code",
   "source": [
    "print(np.isnan(s3[6]))\n",
    "print('-'*50)\n",
    "print(s2.add(s1, fill_value = 0))  #未对齐的数据将和填充值做运算\n",
    "print(s2.sub(s1, fill_value = 0))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.357099Z",
     "start_time": "2025-03-03T06:18:27.349168Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "--------------------------------------------------\n",
      "0    30.0\n",
      "1    32.0\n",
      "2    34.0\n",
      "3    36.0\n",
      "4    38.0\n",
      "5    15.0\n",
      "6    16.0\n",
      "7    17.0\n",
      "8    18.0\n",
      "9    19.0\n",
      "dtype: float64\n",
      "0    10.0\n",
      "1    10.0\n",
      "2    10.0\n",
      "3    10.0\n",
      "4    10.0\n",
      "5   -15.0\n",
      "6   -16.0\n",
      "7   -17.0\n",
      "8   -18.0\n",
      "9   -19.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 23
  },
  {
   "cell_type": "code",
   "source": [
    "#df的对齐运算\n",
    "import numpy as np\n",
    "df1 = pd.DataFrame(np.ones((2,2)), columns = ['a', 'b'])\n",
    "df2 = pd.DataFrame(np.ones((3,3)), columns = ['a', 'b', 'c'])\n",
    "print(df1)\n",
    "print(df2)\n",
    "print('-'*50)\n",
    "print(df2.dtypes)\n",
    "print(df1-df2)\n",
    "print(df2.sub(df1, fill_value = 2)) #未对齐的数据将和填充值做运算"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-03-03T06:18:27.378777Z",
     "start_time": "2025-03-03T06:18:27.359102Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     a    b\n",
      "0  1.0  1.0\n",
      "1  1.0  1.0\n",
      "     a    b    c\n",
      "0  1.0  1.0  1.0\n",
      "1  1.0  1.0  1.0\n",
      "2  1.0  1.0  1.0\n",
      "--------------------------------------------------\n",
      "a    float64\n",
      "b    float64\n",
      "c    float64\n",
      "dtype: object\n",
      "     a    b   c\n",
      "0  0.0  0.0 NaN\n",
      "1  0.0  0.0 NaN\n",
      "2  NaN  NaN NaN\n",
      "     a    b    c\n",
      "0  0.0  0.0 -1.0\n",
      "1  0.0  0.0 -1.0\n",
      "2 -1.0 -1.0 -1.0\n"
     ]
    }
   ],
   "execution_count": 24
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# 总结：没对齐的元素，默认填充NaN，对齐运算时，fill_value参数可以指定填充值。"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
